//-----------------------------------------------------------------------------
//
//Copyright(c) 2020, ThorsianWay Technologies Co, Ltd
//All rights reserved.
//
//IP Name       :   raster
//File Name     :   BTE.v
//Module name   :   BTE
//Full name     :   block traversal emlment for triangle
//
//Author        :   zha daolu
//Email         :   
//Data          :   2020/6/1
//Version       :   V1.00
//
//Abstract      :   
//                  
//Called  by    :   GPU
//
//Modification history
//-----------------------------------------------------
//1.00: intial version 
//
//-----------------------------------------------------------------------------

//-----------------------------
//DEFINE MACRO
//-----------------------------  
module BTE
(
    input		 clk,                              //input clock            
    input		 rst_n,                            //input reset, low active
    input		 busy,                             //input busy
    input		 bte_start,                        //input traversal start
    input [31:0] tile_x,                           //input cur tile left-bottom x coordinate, fixed 16.16
    input [31:0] tile_y,                           //input cur tile left-bottom y coordinate, fixed 16.16 
    input [47:0] vertex0_x               ,         //input vertex0 x coordinate, fixed 1.31.16
    input [47:0] vertex0_y               ,         //input vertex0 y coordinate, fixed 1.31.16
    input [47:0] vertex0_z               ,         //input vertex0 z coordinate, fixed 1.31.16
    input [47:0] vertex0_w               ,         //input vertex0 w coordinate, fixed 1.31.16
    input [31:0] vertex0_s0              ,         //input vertex0 s coordinate, fixed 1.15.16
    input [31:0] vertex0_t0              ,         //input vertex0 t coordinate, fixed 1.15.16
    input [7:0]  vertex0_primary_r       ,         //input vertex0 color red channel
    input [7:0]  vertex0_primary_g      ,          //input vertex0 color green channel
    input [7:0]  vertex0_primary_b      ,          //input vertex0 color blue channel
    input [7:0]  vertex0_primary_a      ,          //input vertex0 color alpha channel
    input [47:0] vertex1_x               ,         //input vertex1 x coordinate, fixed 1.31.16
    input [47:0] vertex1_y               ,         //input vertex1 y coordinate, fixed 1.31.16
    input [47:0] vertex1_z               ,         //input vertex1 z coordinate, fixed 1.31.16
    input [47:0] vertex1_w               ,         //input vertex1 w coordinate, fixed 1.31.16
    input [31:0] vertex1_s0              ,         //input vertex1 s coordinate, fixed 1.15.16
    input [31:0] vertex1_t0              ,         //input vertex1 t coordinate, fixed 1.15.16
    input [7:0]  vertex1_primary_r       ,         //input vertex1 color red channel          
    input [7:0]  vertex1_primary_g      ,          //input vertex1 color green channel        
    input [7:0]  vertex1_primary_b      ,          //input vertex1 color blue channel         
    input [7:0]  vertex1_primary_a      ,          //input vertex1 color alpha channel        
    input [47:0] vertex2_x               ,         //input vertex2 x coordinate, fixed 1.31.16
    input [47:0] vertex2_y               ,         //input vertex2 y coordinate, fixed 1.31.16
    input [47:0] vertex2_z               ,         //input vertex2 z coordinate, fixed 1.31.16
    input [47:0] vertex2_w               ,         //input vertex2 w coordinate, fixed 1.31.16
    input [31:0] vertex2_s0              ,         //input vertex2 s coordinate, fixed 1.15.16
    input [31:0] vertex2_t0              ,         //input vertex2 t coordinate, fixed 1.15.16
    input [7:0]  vertex2_primary_r       ,         //input vertex2 color red channel          
    input [7:0]  vertex2_primary_g      ,          //input vertex2 color green channel        
    input [7:0]  vertex2_primary_b      ,          //input vertex2 color blue channel         
    input [7:0]  vertex2_primary_a      ,          //input vertex2 color alpha channel        
    output [32:0] dz_02                        ,   //output differnce of z0 , z2
    output [9:0]  dprimary_r_02                ,   //output differnce of r0 , r2 
    output [9:0]  dprimary_g_02                ,   //output differnce of g0 , g2 
    output [9:0]  dprimary_b_02                ,   //output differnce of b0 , b2 
    output [9:0]  dprimary_a_02                ,   //output differnce of a0 , a2 
    output [32:0] dz_12                        ,   //output differnce of z1 , z2
    output [9:0]  dprimary_r_12                ,   //output differnce of r1 , r2
    output [9:0]  dprimary_g_12                ,   //output differnce of g1 , g2
    output [9:0]  dprimary_b_12                ,   //output differnce of b1 , b2
    output [9:0]  dprimary_a_12                ,   //output differnce of a1 , a2
    output reg [31:0]		  x_out,               //output overlap x coordinate
    output reg [31:0]		  y_out,               //output overlap y coordinate
    output reg				  xy_out_en,           //output overlap pixel valid
    output signed [63:0]      ee0,                 //output pixel edge function 0
    output signed [63:0]      ee1,                 //output pixel edge function 1
    output signed [63:0]      ee2,                 //output pixel edge function 2
    output  [95:0] area                        ,   //output triagnle area x 2
    output  [31:0] z_slope                     ,   //output z slope
    output reg signed [31:0] ds_02            ,   //output 1/w0  - 1/w2
    output reg signed [31:0] dt_02            ,   //output s0/w0 - s2/w2
    output reg signed [31:0] dw_02            ,   //output t0/w0 - t2/w2
    output reg signed [31:0] ds_12            ,   //output 1/w1  - 1/w2  
    output reg signed [31:0] dt_12            ,   //output s1/w1 - s2/w2
    output reg signed [31:0] dw_12            ,   //output t1/w1 - t2/w2
    output  signed [31:0] w2                   ,   //output  1/w2
    output  signed [31:0] s2                   ,   //output s2/w2
    output  signed [31:0] t2                   ,   //output t2/w2
    output 							mode_end,      //output traversal end
    output reg                     bte_busy        //output busy
);

wire signed[47:0] a0;   //edge funtion parameter
wire signed[47:0] b0;   //edge funtion parameter
wire signed[47:0] a1;   //edge funtion parameter
wire signed[47:0] b1;   //edge funtion parameter
wire signed[47:0] a2;   //edge funtion parameter
wire signed[47:0] b2;   //edge funtion parameter

wire signed[95:0] ee0_int;  //edge function value for tile left-bottom pixel
wire signed[95:0] ee1_int;  //edge function value for tile left-bottom pixel 
wire signed[95:0] ee2_int;  //edge function value for tile left-bottom pixel 

wire 							block_start;  //set up done, block traversal begin
wire [31:0]						tile_x_out;   //tile left-bottom pixel cordinate
wire [31:0]						tile_y_out;   //tile left-bottom pixel cordinate


wire[7:0]						position_pix;     //8-bit pixel in-tile code
wire							pix_en;           //pixel overlap  
wire							block_scan_end;   //block traversal end

wire							pix_fifo_empty;   //overlap block fifo empty
wire							pix_fifo_full;    //overlap block fifo full 
wire							pix_fifo_rd;      //overlap block fifo read 
wire[7:0]						pix_fifo_out;     //overlap block fifo output

wire[3:0]						pix_x_out;        //overlap pixel in tile cordinate
wire[3:0]						pix_y_out;        //overlap pixel in tile cordinate
wire							pix_xy_out_en;    //overlap pixel valid

wire                            pix_end;          //pixel scan end

//set up parameter
set_up uut_set_up
(
	.clk(clk),
	.rst_n(rst_n),
	.bte_start(bte_start),
	.tile_x(tile_x),
	.tile_y(tile_y),
	.vertex0_x  (vertex0_x )            ,
	.vertex0_y  (vertex0_y )            ,
	.vertex0_z  (vertex0_z )            ,
	.vertex0_w  (vertex0_w )            ,
	.vertex0_s0 (vertex0_s0)            ,
	.vertex0_t0 (vertex0_t0)            ,
	.vertex0_primary_r     (vertex0_primary_r     )  ,
	.vertex0_primary_g     (vertex0_primary_g     ) ,
	.vertex0_primary_b     (vertex0_primary_b     ) ,
	.vertex0_primary_a     (vertex0_primary_a     ) ,
	.vertex1_x                   (vertex1_x             )  ,
	.vertex1_y                   (vertex1_y             )  ,
	.vertex1_z                   (vertex1_z             )  ,
	.vertex1_w                   (vertex1_w             )  ,
	.vertex1_s0                  (vertex1_s0            )  ,
	.vertex1_t0                  (vertex1_t0            )  ,
	.vertex1_primary_r           (vertex1_primary_r     )  ,
	.vertex1_primary_g           (vertex1_primary_g     ) ,
	.vertex1_primary_b           (vertex1_primary_b     ) ,
	.vertex1_primary_a           (vertex1_primary_a     ) ,
	.vertex2_x                   (vertex2_x             )  ,
	.vertex2_y                   (vertex2_y             )  ,
	.vertex2_z                   (vertex2_z             )  ,
	.vertex2_w                   (vertex2_w             )  ,
	.vertex2_s0                  (vertex2_s0            )  ,
	.vertex2_t0                  (vertex2_t0            )  ,
	.vertex2_primary_r           (vertex2_primary_r     )  ,
	.vertex2_primary_g           (vertex2_primary_g     ) ,
	.vertex2_primary_b           (vertex2_primary_b     ) ,
	.vertex2_primary_a           (vertex2_primary_a     ) ,
	.ee0_int(ee0_int),
	.ee1_int(ee1_int),
	.ee2_int(ee2_int),
	.a0(a0),
	.b0(b0),
	.a1(a1),
	.b1(b1),
	.a2(a2),
	.b2(b2),
	.block_start(block_start),
	.tile_x_out(tile_x_out),
	.tile_y_out(tile_y_out),
	.dz_02                       (dz_02               ),
	.dprimary_r_02               (dprimary_r_02       ),
	.dprimary_g_02               (dprimary_g_02       ),
	.dprimary_b_02               (dprimary_b_02       ),
	.dprimary_a_02               (dprimary_a_02       ),
	.dz_12                       (dz_12               ),
	.dprimary_r_12               (dprimary_r_12       ),
	.dprimary_g_12               (dprimary_g_12       ),
	.dprimary_b_12               (dprimary_b_12       ),
	.dprimary_a_12               (dprimary_a_12       ),
    .area                        (area   )        ,
    .z_slope                     (z_slope)           ,
    .ds0_s2                          (ds_02     ) ,
    .dt0_t2                          (dt_02     ) ,
    .dw0_w2                          (dw_02     ) ,
    .ds1_s2                          (ds_12     ) ,
    .dt1_t2                          (dt_12     ) ,
    .dw1_w2                          (dw_12     ) , 
    .w2                          (w2     ) ,
    .s2                          (s2     ) ,
    .t2                          (t2     )  
);


//scan block, output overlap 2x2 block
block_scan uut_block_scan
(             
    .clk           (clk         ),
    .rst_n         (rst_n       ),
    .busy          (pix_fifo_full),
    .block_start   (block_start ),
    .a0            (a0 ),
    .b0            (b0 ),
    .a1            (a1 ),
    .b1            (b1 ),
    .a2            (a2 ),
    .b2            (b2 ),
    .ee0_int       (ee0_int),
    .ee1_int       (ee1_int),
    .ee2_int       (ee2_int),
    .position_pix  (position_pix),
    .pix_en        (pix_en),
    .block_scan_end(block_scan_end)
);

//store 2x2 overlap block
fifo#(
	.DWIDTH(8),
	.DSIZE(2)
	)	pix_fifo
(
    .SCAN_mode  (1'b0),
    .datain     (position_pix),
    .rd         (pix_fifo_rd),
    .wr         (pix_en),
    .rst_n      (rst_n),
    .clk        (clk),
    .almost_full(),
    .dataout    (pix_fifo_out),
    .full       (pix_fifo_full),
    .empty      (pix_fifo_empty)
    );

//scan overlap 2x2 block,output overlap pixel
pix_scan uut_pix_scan
    (         
    .clk       (clk),
    .rst_n     (rst_n),
    .busy      (busy),
    .block_end (block_scan_end),
    .fifo_empty(pix_fifo_empty),
    .fifo_out  (pix_fifo_out),
    .a0		   (a0),
    .b0		   (b0),
    .a1		   (a1),
    .b1		   (b1),
    .a2		   (a2),
    .b2		   (b2),
    .ee0_int   (ee0_int),
    .ee1_int   (ee1_int),
    .ee2_int   (ee2_int),
    .x_out     (pix_x_out),
    .y_out     (pix_y_out),
    .xy_out_en (pix_xy_out_en),
    .ee0(ee0),
    .ee1(ee1),
    .ee2(ee2),
    .fifo_rd   (pix_fifo_rd),
    .pix_end   (pix_end)
);


//convert in-tile coordinate to screen cordinate
always@(*)
begin
     x_out 		= {tile_x_out[31:20],pix_x_out,16'b0};  
     y_out 		= {tile_y_out[31:20],pix_y_out,16'b0};
     xy_out_en 	= pix_xy_out_en;
end


reg				end_start_flag;
reg				end_start_flag_ff1;
reg				end_end_flag;

always@(posedge clk or negedge rst_n)
begin
	if(!rst_n)
		end_start_flag <= 1'b0;
	else if(bte_start)
		end_start_flag <= 1'b0;
	else if(block_start)
		end_start_flag <= 1'b1;
end
always@(posedge clk or negedge rst_n)
begin
	if(!rst_n)
		end_start_flag_ff1 <= 1'b0;
	else if(bte_start)
		end_start_flag_ff1 <= 1'b0;
	else 
		end_start_flag_ff1 <= end_start_flag;
end

always@(posedge clk or negedge rst_n)
begin
	if(!rst_n)
		end_end_flag <= 1'b0;
	else if(bte_start)
		end_end_flag <= 1'b0;
	else if(mode_end)
		end_end_flag <= 1'b1;
end

//tile scan done, 1 cycle
assign		mode_end    = block_scan_end && pix_end && end_start_flag_ff1 && (~end_end_flag);

//bte busy
always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		bte_busy <= 0;
	else if(mode_end) 
		bte_busy <= 0;
	else if(bte_start)
		bte_busy <= 1;
end

endmodule
